Load data

to do: * meta data summary? * filter options identical to insights * annotate variant and amplicons so I can easily select

filename <- system.file("extdata", "ABseq021.h5", package = "TapestriR")

# ideally would just start by loading filtered H5, but for now will send filtering list to function to mimic
variants = read_h5(filename = filename, assay_name = ASSAY_NAME_VARIANT, min_mutation_rate = 0.01)

filtered_variants = filter_variants(variants)

vaf=round(filtered_variants@data_layers$AD/filtered_variants@data_layers$DP, 3)
vaf[is.na(vaf)] <- 0
filtered_variants = add_data_layer(filtered_variants,'VAF',vaf)

protein = read_h5(filename,assay_name = ASSAY_NAME_PROTEIN)
protein_counts_norm = protein@data_layers$read_counts %>% clr_by_feature() %>% as_tibble(rownames = NA)
protein = add_data_layer(protein,'normalized',protein_counts_norm)

cnv = read_h5(filename,assay_name = ASSAY_NAME_READ_COUNT)

ab21 = create_moo(experiment_name = basename(filename), cell_annotations = filtered_variants@cell_annotations)
ab21 = add_assay(ab21,filtered_variants)
ab21 = add_assay(ab21,cnv,keep_common_cells = TRUE)
ab21 = add_assay(ab21,protein, keep_common_cells = TRUE)

experiment = ab21

X-Y plots

##################
# select the Proteins to plot on X and Y
##################

#protein_x = 'CD34'
#protein_y = 'CD38'

protein_x = 1
protein_y = 2

##################
# select 1 or more features to color by
# color_by should be a vector of column header you want to color by
##################

#all proteins
color_by = experiment@protein@data_layers$normalized

#select a few proteins
#color_by =  experiment@protein@data_layers$normalized %>% select('CD110','CD117')

#select a few variant
#color_by =  experiment@dna@data_layers$NGT %>% select(1:10) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)


p  = tapestri_scatterplot(x = experiment@protein@data_layers$normalized[[protein_x]], 
                 y= experiment@protein@data_layers$normalized[[protein_y]], 
                 color_by = color_by)
p = p + xlab(protein_x) + ylab(protein_y)
p = p + theme_bw()  + scale_colour_gradient2(low="yellow", mid='grey', high="darkred") 
p

#select a few variant
color_by =  experiment@dna@data_layers$NGT %>% select(1:10) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)
## Warning: Unknown levels in `f`: 2
## Warning: Unknown levels in `f`: 3
## Warning: Unknown levels in `f`: 2
p  = tapestri_scatterplot(x = experiment@protein@data_layers$normalized[[protein_x]], 
                 y= experiment@protein@data_layers$normalized[[protein_y]], 
                 color_by = color_by)
p = p + xlab(protein_x) + ylab(protein_y)
p = p + theme_bw()  
p

Figure out how many clusters?

To do:

Cluster SNV

clustering by kmeans and louvain on umap projection and raw features

##################
# do demension reduction and clustering a few different ways
##################

#dimensional reduction using umap
set.seed(111)
umap_values <- umap(experiment@dna@data_layers$VAF, scale=TRUE, metric="manhattan", init="laplacian", pca=20) 

umap_layer = tibble(    
      x = umap_values[,1],
      y = umap_values[,2]
)

experiment@dna = add_analysis_layer(assay = experiment@dna, layer_name = 'umap_vaf', umap_layer)

######
# Hold all the different customer labels in a single structure
#####
cluster_by = experiment@dna@analysis_layers$umap_vaf
clusters = list()

#### do the clustering

for(i in 2:5) {
  kmean_values <- kmeans(cluster_by, i ,iter.max=500)
  clusters[[paste0('umap.kmean.cluster.',i)]] = as_factor(kmean_values$cluster)

  kmean_values <- kmeans(cluster_by, i ,iter.max=500)
  clusters[[paste0('feature.kmean.cluster.',i)]] = as_factor(kmean_values$cluster)

}

graph_values <- buildSNNGraph(t(cluster_by), k=150)
## Warning in (function (jobs, data, centers, info, distance, k, get.index, : tied
## distances detected in nearest-neighbor calculation
louvain_clust <- igraph::cluster_louvain(graph_values)$membership

clusters[['umap.louvain.cluster']] = as_factor(louvain_clust)

graph_values <- buildSNNGraph(t(cluster_by), k=150)
## Warning in (function (jobs, data, centers, info, distance, k, get.index, : tied
## distances detected in nearest-neighbor calculation
louvain_clust <- igraph::cluster_louvain(graph_values)$membership

clusters[['features.louvain.cluster']] = as_factor(louvain_clust)


#############
## Add cluster labels to analysis data structure
#############
experiment@dna = add_analysis_layer(assay = experiment@dna, layer_name = 'umap_vaf_clusters', as_tibble(clusters))

plot UMAP and Clusters

plot all the different clusters on the same umap projection

p  = tapestri_scatterplot(
                 x = experiment@dna@analysis_layers$umap_vaf$x, 
                 y= experiment@dna@analysis_layers$umap_vaf$y, 
                 color_by = experiment@dna@analysis_layers$umap_vaf_clusters)
p = p + umap_theme() + ggtitle('umap_vaf_clusters')
p

color UMAP by genotypes

plotly so you can interactively show remove values

 #%>% select(!contains('chr2:198267'))

color_by = experiment@dna@data_layers$NGT %>% select(1:2) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)
## Warning: Unknown levels in `f`: 2
## Warning: Unknown levels in `f`: 3
p  = tapestri_scatterplot(
                 x = experiment@dna@analysis_layers$umap_vaf$x, 
                 y= experiment@dna@analysis_layers$umap_vaf$y, 
                 color_by = color_by)
p = p + umap_theme() 
p = p + ggtitle('Color by Genotypes')
ggplotly(p)

violin plots

to do:

  • name the clusters based on signature
  • show heatmap of signature
color_by = experiment@dna@analysis_layers$umap_vaf_clusters$umap.kmean.cluster.2

p  = tapestri_scatterplot(
                 x = experiment@dna@analysis_layers$umap_vaf$x, 
                 y= experiment@dna@analysis_layers$umap_vaf$y, 
                 color_by = color_by)
p = p + umap_theme() + ggtitle('umap_vaf_clusters') + theme(legend.position = 'none')


v = tapestri_violinplot(clusters = color_by,
               features = experiment@protein@data_layers$normalized)
v = v + theme_bw() + theme(legend.position = "none",
                            axis.text.x = element_text(angle = 90, hjust = 1))
  
## pathwork magic
p / v

Color UMAP by Proteins

p  = tapestri_scatterplot(
                 x = experiment@dna@analysis_layers$umap_vaf$x, 
                 y= experiment@dna@analysis_layers$umap_vaf$y, 
                 color_by = experiment@protein@data_layers$normalized)
p = p + umap_theme() + scale_colour_gradient2(low="yellow", mid='grey', high="darkred") 
p = p + ggtitle('Color by Proteins')
p

Cluster by Proteins

clustering by kmeans and louvain on umap projection and raw features

##################
# do demension reduction and clustering a few different ways
##################

#dimensional reduction using umap
set.seed(111)
umap_values <- umap(experiment@protein@data_layers$normalized, scale=TRUE, metric="manhattan", init="laplacian", pca=20) 

umap_layer = tibble(    
      x = umap_values[,1],
      y = umap_values[,2]
)

experiment@protein = add_analysis_layer(assay = experiment@protein, layer_name = 'umap', umap_layer)
  
######
# Hold all the different customer labels in a single structure
#####
cluster_by = experiment@protein@analysis_layers$umap
clusters = list()

#### do the clustering

for(i in 2:5) {
  kmean_values <- kmeans(cluster_by, i ,iter.max=500)
  clusters[[paste0('umap.kmean.cluster.',i)]] = as_factor(kmean_values$cluster)

  kmean_values <- kmeans(cluster_by, i ,iter.max=500)
  clusters[[paste0('feature.kmean.cluster.',i)]] = as_factor(kmean_values$cluster)

}

graph_values <- buildSNNGraph(t(cluster_by), k=150)
## Warning in (function (jobs, data, centers, info, distance, k, get.index, : tied
## distances detected in nearest-neighbor calculation
louvain_clust <- igraph::cluster_louvain(graph_values)$membership

clusters[['umap.louvain.cluster']] = as_factor(louvain_clust)

graph_values <- buildSNNGraph(t(cluster_by), k=150)
## Warning in (function (jobs, data, centers, info, distance, k, get.index, : tied
## distances detected in nearest-neighbor calculation
louvain_clust <- igraph::cluster_louvain(graph_values)$membership

clusters[['features.louvain.cluster']] = as_factor(louvain_clust)


#############
## Add cluster labels to analysis data structure
#############
experiment@protein = add_analysis_layer(assay = experiment@protein, layer_name = 'clusters', as_tibble(clusters))

plot UMAP and Clusters

plot all the different clusters on the same umap projection

p  = tapestri_scatterplot(
                 x = experiment@protein@analysis_layers$umap$x, 
                 y= experiment@protein@analysis_layers$umap$y, 
                 color_by = experiment@protein@analysis_layers$clusters)

p = p + xlab('') + ylab('')
p = p + umap_theme() 
p

violin plots

to do:

  • name the clusters based on signature
  • show heatmap of signature
p_umap  = tapestri_scatterplot(
                 x = experiment@protein@analysis_layers$umap$x, 
                 y= experiment@protein@analysis_layers$umap$y, 
                 color_by = experiment@protein@analysis_layers$clusters$features.louvain.cluster) + 
  xlab('') + ylab('') + umap_theme() 

p_violin = tapestri_violinplot(
           clusters = experiment@protein@analysis_layers$clusters$features.louvain.cluster,
           features = experiment@protein@data_layers$normalized)

p_umap / p_violin

Color UMAP by features

p  = tapestri_scatterplot(
                 x = experiment@protein@analysis_layers$umap$x, 
                 y= experiment@protein@analysis_layers$umap$y, 
                 color_by = experiment@protein@data_layers$normalized)
p = p + umap_theme() + scale_colour_gradient2(low="yellow", mid='grey', high="darkred") 
p

To do

CNV

data_to_plot = tibble(
  experiment@dna@analysis_layers$clusters,
  analysis_df$cnv$features$read_counts
)

df_long = data_to_plot %>% 
  pivot_longer(-c(contains('cluster')), 
               names_to = 'feature',values_to = 'value') %>% 
  mutate(feature = as_factor(feature))

# group by clusters
p = ggplot(data=df_long 
           #%>% filter(feature %in% colnames(analysis_df$amplicon$features)[30:60])
     )
p = p + geom_boxplot(aes(x=feature, y=value, fill=umap.kmean.cluster.2), outlier.shape = NA, notch = TRUE )
#p = p + facet_wrap(~umap.kmean.cluster, ncol = 1)
#p = p + geom_smooth(aes(x=as.numeric(feature), y=value, color=umap.kmean.cluster), method = 'loess',se=FALSE)
p = p + xlab('') + ylab('')
p = p + theme_bw()  + theme(legend.position = "none",
              axis.text.x = element_text(angle = 90, hjust = 1))
p

Heatmap